{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 第七节 LangChain 表达式语言 (LCEL)_2\n",
    "\n",
    "https://langchain114.com/docs/expression_language\n",
    "\n",
    "https://blog.csdn.net/Attitude93/article/details/136531425"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入语言模型\n",
    "import os\n",
    "from langchain_community.llms import Tongyi\n",
    "from langchain_community.llms import SparkLLM\n",
    "from langchain_community.llms import QianfanLLMEndpoint\n",
    "\n",
    "import pandas as pd\n",
    "#导入模版\n",
    "from langchain.prompts import PromptTemplate\n",
    "\n",
    "#导入聊天模型\n",
    "from langchain.prompts.chat import (\n",
    "    ChatPromptTemplate,\n",
    "    SystemMessagePromptTemplate,\n",
    "    AIMessagePromptTemplate,\n",
    "    HumanMessagePromptTemplate,\n",
    ")\n",
    "from langchain.schema import (\n",
    "    AIMessage,\n",
    "    HumanMessage,\n",
    "    SystemMessage\n",
    ")\n",
    "\n",
    "from langchain_community.chat_models import ChatSparkLLM\n",
    "from langchain_community.chat_models.tongyi import ChatTongyi\n",
    "from langchain_community.chat_models import QianfanChatEndpoint\n",
    "\n",
    "from langchain.chains import LLMChain\n",
    "\n",
    "#输入三个模型各自的key\n",
    "\n",
    "os.environ[\"DASHSCOPE_API_KEY\"] = \"\"\n",
    "\n",
    "os.environ[\"IFLYTEK_SPARK_APP_ID\"] = \"\"\n",
    "os.environ[\"IFLYTEK_SPARK_API_KEY\"] = \"\"\n",
    "os.environ[\"IFLYTEK_SPARK_API_SECRET\"] = \"\"\n",
    "\n",
    "os.environ[\"QIANFAN_AK\"] = \"\"\n",
    "os.environ[\"QIANFAN_SK\"] = \"\"\n",
    "\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain_core.output_parsers import StrOutputParser #仅仅是让输出对象成为字符串\n",
    "\n",
    "from langchain_core.runnables import RunnablePassthrough\n",
    "from langchain.vectorstores import Chroma\n",
    "from langchain_community.embeddings import QianfanEmbeddingsEndpoint\n",
    "\n",
    "from operator import itemgetter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_ty = Tongyi(temperature=0.1)\n",
    "model_qf = QianfanLLMEndpoint(temperature=0.1)\n",
    "chat_ty = ChatTongyi()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 自定义函数\n",
    "\n",
    "RunnableLambda\n",
    "\n",
    "https://langchain114.com/docs/expression_language/how_to/functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.runnables import RunnableLambda\n",
    "\n",
    "\n",
    "def length_function(text):\n",
    "    return len(text)\n",
    "\n",
    "def _multiple_length_function(text1, text2):\n",
    "    return len(text1) * len(text2)\n",
    "\n",
    "def multiple_length_function(_dict):\n",
    "    return _multiple_length_function(_dict[\"text1\"], _dict[\"text2\"])\n",
    "\n",
    "prompt = ChatPromptTemplate.from_template(\"what is {a} + {b}\")\n",
    "chain1 = prompt | model_ty\n",
    "chain =(\n",
    "{\n",
    "    \"a\": itemgetter(\"foo\") | RunnableLambda(length_function),\n",
    "    \"b\": {\"text1\": itemgetter(\"foo\"), \"text2\": itemgetter(\"bar\")}\n",
    "    | RunnableLambda(multiple_length_function),\n",
    "}\n",
    "| prompt\n",
    "| model_ty\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'10'"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain1.invoke({\"a\":3,\"b\":7})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'12'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain.invoke({\"foo\":\"bar\",\"bar\":\"gah\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 根据输入动态路由逻辑\n",
    "\n",
    "有两种执行路由的方式：\n",
    "\n",
    "* 使用 RunnableBranch。\n",
    "* 编写自定义工厂函数，该函数接受前一步的输入并返回一个 runnable。重要的是，这应该返回一个 runnable 而不是实际执行。\n",
    "\n",
    "https://langchain114.com/docs/expression_language/how_to/routing\n",
    "\n",
    "* 基于语义相似性的路由\n",
    "\n",
    "https://langchain114.com/docs/expression_language/cookbook/embedding_router\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO] [04-02 13:50:42] openapi_requestor.py:316 [t:3748]: requesting llm api endpoint: /embeddings/embedding-v1\n"
     ]
    }
   ],
   "source": [
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.utils.math import cosine_similarity\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
    "\n",
    "physics_template = \"\"\"你是一个非常聪明的物理学教授。\n",
    "你擅长以简洁易懂的方式回答物理问题。\n",
    "当你不知道答案时，你会承认不知道。\n",
    "\n",
    "这是一个问题：\n",
    "{query}\"\"\"\n",
    "\n",
    "math_template = \"\"\"你是一个非常优秀的数学家。\n",
    "你擅长回答数学问题。\n",
    "你之所以优秀是因为你能够将难题分解为组成部分，\n",
    "回答组成部分，然后把它们组合起来回答更广泛的问题。\n",
    "\n",
    "这是一个问题：\n",
    "{query}\"\"\"\n",
    "\n",
    "embeddings = QianfanEmbeddingsEndpoint()\n",
    "prompt_templates = [physics_template, math_template]\n",
    "prompt_embeddings = embeddings.embed_documents(prompt_templates)\n",
    "\n",
    "\n",
    "def prompt_router(input):\n",
    "    query_embedding = embeddings.embed_query(input[\"query\"])\n",
    "    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]\n",
    "    most_similar = prompt_templates[similarity.argmax()]\n",
    "    print(\"使用数学\" if most_similar == math_template else \"使用物理\")\n",
    "    return PromptTemplate.from_template(most_similar)\n",
    "\n",
    "chain = (\n",
    "    {\"query\": RunnablePassthrough()}\n",
    "    | RunnableLambda(prompt_router)\n",
    "    | ChatTongyi()\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(prompt_embeddings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO] [04-02 13:50:44] openapi_requestor.py:316 [t:3748]: requesting llm api endpoint: /embeddings/embedding-v1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "使用物理\n",
      "黑洞是宇宙中一种极其奇特的天体，它的存在基于爱因斯坦的广义相对论。简单来说，黑洞是由极度密集的物质，通常是恒星在死亡后塌缩形成的，其引力强大到连光也无法逃脱，因此得名“黑洞”（英文为Black Hole）。\n",
      "\n",
      "在一个黑洞的事件视界内，任何东西一旦进入，就再也无法离开，包括光线。这个边界被称为“奇点”，其中物质的密度和引力场无限大。我们不能直接观察黑洞，因为它们不发射或反射光线，但我们可以通过测量周围物体的运动和辐射来推断它们的存在。\n",
      "\n",
      "黑洞的研究是现代天体物理学的重要领域，它们帮助科学家们理解宇宙的极端条件和引力的强大力量。虽然我们不能亲眼看到黑洞，但通过理论计算和间接证据，科学家们正在逐步揭示它们的秘密。\n"
     ]
    }
   ],
   "source": [
    "print(chain.invoke(\"什么是黑洞\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[INFO] [04-02 13:50:54] openapi_requestor.py:316 [t:3748]: requesting llm api endpoint: /embeddings/embedding-v1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "使用数学\n",
      "路径积分是量子力学和数学物理学中的一个重要概念，由理论物理学家理查德·费曼在20世纪40年代提出。它是一种计算量子系统中粒子行为的方法，尤其是在没有经典解析解的情况下。在量子力学中，路径积分可以理解为所有可能路径（从一个位置到另一个位置）的贡献之和，每个路径都有一个对应的复数概率幅度。\n",
      "\n",
      "简单来说，路径积分可以看作是对路径空间进行的一种积分，这里的“路径”指的是粒子从一个点移动到另一个点的所有可能轨迹。在经典力学中，我们通常只考虑一条确定的路径，但在量子力学中，由于波粒二象性，粒子的行为表现为概率分布，所以我们需要考虑所有可能的路径并加权求和。\n",
      "\n",
      "路径积分公式通常用于薛定谔方程的量子化形式，即所谓的费曼路径积分，它是量子场论的基础之一，对于量子电动力学、量子色动力学等现代粒子物理学理论具有关键作用。\n"
     ]
    }
   ],
   "source": [
    "print(chain.invoke(\"什么是路径积分\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### classwork 1\n",
    "\n",
    "* 改写上面基于相似性路由代码，加入以为化学专家，是的程序能基于问题在三个专家中选择最符合的专家回答"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于LECL实现通过python程序来完成问题回答\n",
    "\n",
    "* classwork2 完成此节代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.utilities import PythonREPL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9\n"
     ]
    }
   ],
   "source": [
    "print(3*3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Python REPL can execute arbitrary code. Use with caution.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'9\\n'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "PythonREPL().run('print(3*3)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 提出问题——语言模型写代码回答问题——从回答中提取代码——运行代码=结果"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "template_code=\"请通过写python代码来回答对应问题，此问题是：{question} \\n 注意仅需要通过python代码回答，不需要文字 \\n 代码形式如下: ```python\\n...\\n```\"\n",
    "t_code=PromptTemplate.from_template(template_code)\n",
    "\n",
    "def get_code(x):\n",
    "    return x.split(\"```python\")[1].split(\"```\")[0]\n",
    "\n",
    "chain_code=(t_code|ChatTongyi()|StrOutputParser()|RunnableLambda(get_code)|PythonREPL().run)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "#chain_code.invoke({\"question\":\"请帮我获取网页'https://abs.hznu.edu.cn/c/2024-03-29/2936058.shtml'中的内容\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(chain_pro.invoke({\"question\":\"2乘以8等于几\"}))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "current_path = os.getcwd()\n",
    "\n",
    "print(current_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基于LCEL实现自动查询 SQL 数据库\n",
    "\n",
    "* sqlite数据库下载\n",
    "\n",
    "链接：https://pan.baidu.com/s/1_nvpBZcrZ_3dgYD1HKX3rg?pwd=kp0h \n",
    "提取码：kp0h\n",
    "\n",
    "RunnablePassthrough.assign(schema=get_schema,response=lambda x: run_query(x[\"query\"]))\n",
    "\n",
    "* classwork3 完成此节代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基于对应数据提出问题——语言模型基于问题生成sql——提取sql——执行sql——综合所有信息生成模板——语言模型基于所有信息进行回答"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.utilities import SQLDatabase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "db = SQLDatabase.from_uri(\"sqlite:///ali_langchain.db\")\n",
    "\n",
    "def get_schema(_):\n",
    "    return db.get_table_info()\n",
    "    \n",
    "def run_query(query):\n",
    "    return db.run(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[(182880,)]'"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "run_query(\"SELECT COUNT(*) FROM alidata\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#print(db.get_table_info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "template_sql=\"请通过写sql代码来回答对应问题，并且需要基于如下数据库信息：{info} \\n 需要回答问题是：{question} \\n 注意仅需要通过sql代码回答，不需要文字 \\n 代码形式如下: ```sql\\n...\\n```\"\n",
    "t_sql=PromptTemplate.from_template(template_sql)\n",
    "\n",
    "def get_sql(x):\n",
    "    return x.split(\"```sql\")[1].split(\"```\")[0]\n",
    "\n",
    "chain_sql=({\"info\":get_schema,\"question\":RunnablePassthrough()}|t_sql|ChatTongyi()|StrOutputParser()|RunnableLambda(get_sql))\n",
    "\n",
    "template_sql0=\"请通过综合如下的数据库信息，问题，sql代码，sql代码的执行结果给出问题的自然语言回答：\\n 数据库信息{info} \\n 需要回答问题是：{question} \\n sql代码：{query} \\n sql代码执行结果: {res}\"\n",
    "t_sql0=PromptTemplate.from_template(template_sql0)\n",
    "\n",
    "chain_sql0=({\"info\":get_schema,\"question\":RunnablePassthrough(),\"query\":chain_sql}|RunnablePassthrough.assign(res=lambda x: run_query(x[\"query\"]))|t_sql0|ChatTongyi()|StrOutputParser())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'在alidata表中，记录数最多的五个品牌（按数量降序）分别是：\\n\\n1. 品牌7868，出现了3356次\\n2. 品牌11196，出现了2147次\\n3. 品牌11080，出现了1688次\\n4. 品牌12220，出现了1682次\\n5. 品牌21110，出现了1597次'"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain_sql0.invoke(\"记录数最多的五个brand是\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[(182880,)]'"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "run_query(\"\\nSELECT COUNT(*) FROM alidata;\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AIMessage(content='在alidata表中，有9531个不同的品牌。')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chain_sql2.invoke(\"表里有多少brand？\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'[(182880,)]'"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "run_query('\\nSELECT COUNT(*) FROM alidata;\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "参考链接：\n",
    "\n",
    "https://langchain114.com/docs/expression_language/cookbook/sql_db\n",
    "\n",
    "https://langchain114.com/docs/expression_language/cookbook/code_writing"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
